Velocity data control

Author

Mariana Morales

1 Code

This code was created using RStudio version 2024.12.1 by generating a Quarto document.

Mostrar código
# Cargar librerías
library(dplyr)
library(lubridate)
library(ggplot2)
library(readr)
library(plotly)
library(zoo)
Mostrar código
# Load the data file
datos <- read.table("C:\\Users\\marin\\Documents\\Trabajo\\Dayos Guillaume\\Rstudio quarto\\Datos sensores3.csv", sep = ";", dec = ",", header = TRUE)

# Convert to numeric
datos$Vitesse <- as.numeric(datos$Vitesse)
datos$VEGA_E <- as.numeric(datos$VEGA_E)

# Save original values to compare later
datos$Vitesse_original <- datos$Vitesse  # or save before filtering if you want to see the real change

# Convert VEGA_E to numeric, replacing commas with dots
datos <- datos %>% mutate(VEGA_E = as.numeric(gsub(",", ".", VEGA_E)))

2 Velocity filters

2.1 Repeated values filter

If there are more than three consecutive repeated values, they will be replaced with NaN.

Mostrar código
# Detect sequences of repeated consecutive values
rle_vitesse <- rle(datos$Vitesse)

# Create a logical vector with TRUE in positions that should be replaced with NaN
to_replace <- inverse.rle(with(rle_vitesse, {
  list(lengths = lengths, values = lengths > 3)
}))

# Replace those values with NaN
datos$Vitesse[to_replace] <- NaN

2.2 Outlier filter

A moving average is used, considering the temporal resolution, so the moving average window is 4 measurements (40 minutes). If the difference between the value and the moving average is greater than 30%, it is considered an outlier.

Mostrar código
# Ensure Vitesse is numeric
datos$Vitesse <- as.numeric(datos$Vitesse)

# Calculate the moving average with a window of 4
media_movil <- zoo::rollapply(datos$Vitesse, width = 4, FUN = mean, align = "center", fill = NA)

# Calculate the relative difference
diferencia_relativa <- abs(datos$Vitesse - media_movil) / media_movil

# Identify outliers with more than 30% difference
outliers <- diferencia_relativa > 0.3

# Replace with NaN
datos$Vitesse[outliers] <- NaN

2.3 Physical limits filter

The maximum surface velocity in a natural river is 7 m/s and the minimum recommended limit is 0.02 m/s. Any value outside this range will be replaced with NaN.

Mostrar código
# Apply the filter and assign NaN to out-of-range values
datos$Vitesse[datos$Vitesse < 20 | datos$Vitesse > 7000] <- NaN

2.4 SNR filter

Data with an SNR value below 10 dBm may be imprecise, and those below 6 are considered unreliable.

Mostrar código
# Ensure that vitesse is numeric
datos$SNR_avg <- as.numeric(datos$SNR_avg)

# Filter and assign NaN to the 'Vitesse' variable when 'SNR' is below 10
datos$Vitesse[datos$SNR_avg < 10] <- NaN

# Save the final result with , as separator
write_delim(datos, "datos_velocidad_filtrados.csv", delim = ",")

3 Plots

3.1 Filtered surface velocity and original surface velocity

Mostrar código
# Load the files
filtrados <- read_delim("datos_velocidad_filtrados.csv", delim = ",", col_types = cols(.default = "c"))

# Remove rows with NaN data
#datos_sin_NaN <- drop_na(datos)

# Convert DateTime to date type
datos$DateTime <- as.POSIXct(datos$DateTime, format = "%d/%m/%Y %H:%M")
Mostrar código
# Create the interactive plot
fig <- plot_ly()

fig <- fig %>% add_markers(
  x = datos$DateTime, 
  y = datos$Vitesse_original, 
  name = "Original Surface velocity",
  marker = list(
    color = "gray",
    size = 3
  ),
  visible = "legendonly"
)

fig <- fig %>% add_markers(
  x = datos$DateTime, 
  y = datos$Vitesse, 
  name = "Surface velocity",
  marker = list(
    color = "blue", 
    size = 3
  )
)

fig <- fig %>% layout(
  title = "Filtered surface velocity and original surface velocity",
  xaxis = list(title = "Date", type = "date"),
  yaxis = list(
    title = "Surface velocity (mm/s)",
    side = "left",
    showgrid = FALSE
  ),
  yaxis2 = list(
    side = "right",
    overlaying = "y",
    showgrid = TRUE,
    title_standoff = 30,  # Adjust the distance between the title and the axis
    ticklen = 6,          # Adjust the length of the tick marks
    tickangle = 0         # Adjust the angle
  ),
  legend = list(
    orientation = "h",     # horizontal
    x = 0,                 # aligned to the left
    y = -0.2               # below the X axis
  ),
  hovermode = "x",
  width = 800,
  height = 450,
  margin = list(r = 100)  # Adjust the right margin to give space to the secondary axis
)

# Display the interactive plot
fig

3.2 Porcentaje de datos convertidos a NaN (Todo el periodo)

Mostrar código
# Sumar la cantidad de valores no NA en la columna 'Vitesse'
cantidad_vitesse <- sum(!is.na(datos$Vitesse))
cantidad_vitesse
[1] 18209
Mostrar código
# Sumar la cantidad de valores no NA en la columna 'Vitesse_original'
cantidad_vitesse_original <- sum(!is.na(datos$Vitesse_original))
cantidad_vitesse_original
[1] 38483
Mostrar código
# Calcular la diferencia en la cantidad de valores no NA entre ambas columnas
diferencia <- abs(cantidad_vitesse - cantidad_vitesse_original)

# Calcular el porcentaje de diferencia
porcentaje_diferencia <- (diferencia / cantidad_vitesse_original) * 100

# Imprimir los resultados
print(paste("Diferencia en cantidad de datos no NA:", diferencia))
[1] "Diferencia en cantidad de datos no NA: 20274"
Mostrar código
print(paste("Porcentaje de diferencia:", round(porcentaje_diferencia, 2), "%"))
[1] "Porcentaje de diferencia: 52.68 %"

3.3 Porcentaje de datos convertidos a NaN (Periodo: Abril 27 - Noviembre 3)

Mostrar código
# Asegurarse de que la columna 'Date' esté en formato Date
datos$Date <- as.Date(datos$Date)

# Filtrar los datos entre el 27 de abril y el 3 de noviembre de 2024
datos_filtrados <- subset(datos, Date >= as.Date("2024-04-27") & Date <= as.Date("2024-11-03"))

# Sumar la cantidad de valores no NA en la columna 'Vitesse' para el periodo filtrado
cantidad_vitesse <- sum(!is.na(datos_filtrados$Vitesse))
cantidad_vitesse
[1] 18209
Mostrar código
# Sumar la cantidad de valores no NA en la columna 'Vitesse_original' para el periodo filtrado
cantidad_vitesse_original <- sum(!is.na(datos_filtrados$Vitesse_original))
cantidad_vitesse_original
[1] 25741
Mostrar código
# Calcular la diferencia en la cantidad de valores no NA entre ambas columnas
diferencia <- abs(cantidad_vitesse - cantidad_vitesse_original)

# Calcular el porcentaje de diferencia
porcentaje_diferencia <- (diferencia / cantidad_vitesse_original) * 100

# Imprimir los resultados
print(paste("Diferencia en cantidad de datos no NA (periodo):", diferencia))
[1] "Diferencia en cantidad de datos no NA (periodo): 7532"
Mostrar código
print(paste("Porcentaje de diferencia (periodo):", round(porcentaje_diferencia, 2), "%"))
[1] "Porcentaje de diferencia (periodo): 29.26 %"

3.4 Surface velocity vs Water Level

Mostrar código
# Create a new data frame without NA/NaN values and with a Month column
df <- na.omit(datos[, c("DateTime", "VEGA_E", "Vitesse")])
df$Mes <- lubridate::month(df$DateTime, label = TRUE, abbr = TRUE)

# Create the interactive plot with color by month
fig <- plot_ly(
  data = df,
  x = ~VEGA_E,
  y = ~Vitesse,
  type = 'scatter',
  mode = 'markers',
  colors = "Paired",  # Color palette
  marker = list(size = 6),
  text = ~paste("Date:", DateTime, "<br>Month:", Mes, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
  hoverinfo = 'text'
)

# Customize layout
fig <- fig %>% layout(
  title = "Surface velocity vs Water Level",
  xaxis = list(title = "Water level (m)"),
  yaxis = list(title = "Surface velocity (mm/s)"),
  hovermode = "closest"
)

# Show plot
fig

3.5 Surface velocity vs Water Level colored by month

Mostrar código
# Create a new data frame without NA/NaN values and with a Month column
df <- na.omit(datos[, c("DateTime", "VEGA_E", "Vitesse")])
df$Mes <- lubridate::month(df$DateTime, label = TRUE, abbr = TRUE)

# Create the interactive plot with color by month
fig <- plot_ly(
  data = df,
  x = ~VEGA_E,
  y = ~Vitesse,
  type = 'scatter',
  mode = 'markers',
  color = ~Mes,  # Grouping by month
  colors = "Paired",  # Color palette
  marker = list(size = 6),
  text = ~paste("Date:", DateTime, "<br>Month:", Mes, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
  hoverinfo = 'text'
)

# Customize layout
fig <- fig %>% layout(
  title = "Surface velocity vs Water Level colored by month",
  xaxis = list(title = "Water level (m)"),
  yaxis = list(title = "Surface velocity (mm/s)"),
  hovermode = "closest"
)

# Show plot
fig

3.6 Surface velocity vs Water Level colored by month and connected

Mostrar código
# Create a new data frame without NA/NaN values and with a Month column
df <- na.omit(datos[, c("DateTime", "VEGA_E", "Vitesse")])
df$Mes <- lubridate::month(df$DateTime, label = TRUE, abbr = TRUE)

# Order the data by DateTime to ensure the line follows the correct order
df <- df[order(df$DateTime), ]

# Create the interactive plot with color by month
fig <- plot_ly(
  data = df,
  x = ~VEGA_E,
  y = ~Vitesse,
  type = 'scatter',
  mode = 'lines+markers',  # Change to lines+markers to add a line
  color = ~Mes,  # Grouping by month
  colors = "Paired",  # Color palette
  marker = list(size = 6),
  text = ~paste("Date:", DateTime, "<br>Month:", Mes, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
  hoverinfo = 'text'
)

# Customize layout
fig <- fig %>% layout(
  title = "Surface velocity vs Water Level colored by month",
  xaxis = list(title = "Water level (m)"),
  yaxis = list(title = "Surface velocity (mm/s)"),
  hovermode = "closest"
)

# Show plot
fig

3.7 Surface velocity vs Water Level colored by event(< 0.9 m)

Mostrar código
# Crear la columna lógica para detectar las crecidas
datos$Crecida <- datos$VEGA_E > 0.9

# Inicializar las columnas
datos$Evento <- NA_integer_
datos$EtiquetaEvento <- NA_character_

evento_id <- 0
en_crecida <- FALSE
fecha_inicio <- NA
evento_inicial_asignado <- FALSE  # Variable para manejar el bloque inicial

for (i in 1:nrow(datos)) {
  if (!evento_inicial_asignado) {
    # Asignar un color o etiqueta a los datos previos a la crecida (bloque inicial)
    evento_id <- 0  # Este es el "evento inicial", antes de que se alcance 0.9 m
    datos$Evento[i] <- evento_id
    datos$EtiquetaEvento[i] <- "2024-05-09 15:10:00"
    
    # Cuando se alcanza el primer valor de crecida, se empieza a registrar los eventos
    if (!is.na(datos$Crecida[i]) && datos$Crecida[i]) {
      evento_inicial_asignado <- TRUE
      fecha_inicio <- as.character(datos$DateTime[i])  # guardar la fecha del primer evento de crecida
    }
  } else {
    if (!is.na(datos$Crecida[i]) && datos$Crecida[i]) {
      if (!en_crecida) {
        evento_id <- evento_id + 1
        en_crecida <- TRUE
        fecha_inicio <- as.character(datos$DateTime[i])  # guardar la fecha cuando comienza la crecida
      }
    } else {
      en_crecida <- FALSE
    }

    if (!is.na(datos$Crecida[i]) && evento_id > 0) {
      datos$Evento[i] <- evento_id
      datos$EtiquetaEvento[i] <- fecha_inicio
    }
  }
}

# Convertir la etiqueta a un factor para que funcione en plotly
datos$EtiquetaEvento <- factor(datos$EtiquetaEvento)

# Filtrar los datos para la visualización
datos_plot <- subset(datos, !is.na(VEGA_E) & !is.na(Vitesse) & !is.na(EtiquetaEvento))
Mostrar código
fig <- plot_ly(
  data = datos_plot,
  x = ~VEGA_E,
  y = ~Vitesse,
  type = 'scatter',
  mode = 'markers',
  color = ~EtiquetaEvento,
  colors = "Paired",
  marker = list(size = 6),
  text = ~paste("Date:", DateTime, "<br>Event since:", EtiquetaEvento, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
  hoverinfo = 'text'
)

fig <- fig %>% layout(
  title = "Surface velocity vs Water level colored by event (< 0.9 m)",
  xaxis = list(title = "Water level (m)"),
  yaxis = list(title = "Surface velocity (mm/s)"),
  hovermode = "closest"
)

fig

3.8 Surface velocity vs Water Level colored by event(< 0.9 m)

Mostrar código
fig <- plot_ly(
  data = datos_plot,
  x = ~VEGA_E,
  y = ~Vitesse,
  type = 'scatter',
  mode = 'lines+markers',
  color = ~EtiquetaEvento,
  colors = "Paired",
  marker = list(size = 6),
  text = ~paste("Date:", DateTime, "<br>Event since:", EtiquetaEvento, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
  hoverinfo = 'text'
)

fig <- fig %>% layout(
  title = "Surface velocity vs Water level colored by event (< 0.9 m)",
  xaxis = list(title = "Water level (m)"),
  yaxis = list(title = "Surface velocity (mm/s)"),
  hovermode = "closest"
)

fig